
clear
set matsize 5000
cd <directory>
**********************************************************************************************************************
** Cleans raw salary data. Takes the cleaned weather data (from weatherdata_clean.do) and  merges them together with the time in and time out data
**********************************************************************************************************************

foreach i of newlist jan feb mar apr may jun jul aug sep oct nov dec{
foreach j of numlist 2010{
use "eas_`i'_`j'.dta", clear

drop if date==.
g year=year(date)
g month=month(date)
g day=day(date)

keep unit tkn_no subdept intime outtime year month day
drop if intime==. | outtime==.
save "eas_`i'_`j'_reshapedlong.dta", replace

}
}

foreach i of newlist jan feb mar apr may jun jul aug sep oct nov dec{
foreach j of numlist 2011 2012 2013{
use "eas_`i'_`j'.dta", clear

drop if date==.
g year=year(date)
g month=month(date)
keep unit tkn_no year month subdept a*_in a*_out
ren a*_in intime*
ren a*_out outtime*
ren intime0* intime*
ren outtime0* outtime*
reshape long intime outtime, i(unit tkn subdept year month) j(day)
drop if intime==. | outtime==.
save "eas_`i'_`j'_reshapedlong.dta", replace
}
}


clear
foreach i of newlist jan feb mar apr may jun jul aug sep oct nov dec{
foreach j of numlist 2011 2012 2013{
append using "eas_`i'_`j'_reshapedlong.dta"
}
}

drop if intime==0

keep if subdept=="SEWING"
su intime, d
replace intime=. if intime<r(p1) | intime>r(p99)

su outtime, d
replace outtime=. if outtime<r(p1) | outtime>r(p99)

g intimestr=intime

tostring intimestr, replace force
destring intimestr, replace
su intimestr intime
replace intimestr=round(intimestr,0.01)

tostring intimestr, replace force

split intimestr, p(".")

destring intimestr1 intimestr2, replace

g timeinmin=60* intimestr1+ intimestr2
replace intimestr=subinstr(intimestr,".",":",.)

ntimeofday intimestr, gen(ndatetimein) s(h mi ) n(minutes)
coun if ndatetimein!= timeinmin

g outtimestr=outtime

tostring outtimestr, replace force
destring outtimestr, replace
su outtimestr outtime
replace outtimestr=round(outtimestr,0.01)

tostring outtimestr, replace force

split outtimestr, p(".")

destring outtimestr1 outtimestr2, replace

g timeoutmin=60* outtimestr1+ outtimestr2

g minatwork=timeoutmin-timeinmin

drop if minatwork<0

g tokenno=tkn_no

merge m:1 unit tokenno using "linemapping_tokenno_fromattendancedata", gen(mm)
drop if mm==2
save "workerdaily_inandouttime", replace

merge m:1 unit using "monthandyearledfromlineleveldata"
keep if _merge==3


g monthy=year+(month/12)
destring yearled monthled, replace
g monthyled=yearled+(monthled/12)
g led=monthy>=monthyled
bys unit: egen unitled=mean(led)


merge m:1 unit using "tamu_allunitsmerge", gen(mm)
keep if mm==3
drop mm
merge m:1 longitude latitude year month day using "weather_2010to2014_allweeklags.dta", gen(mmm) keepusing(meant himeant wbgtmeant relativehumidity precipitation)
keep if mmm==3
drop mmm
save "inandouttime_sewing_merged", replace


**Collapse to line-level

use  "workerdaily_inandouttime", clear
keep if rno!=.
collapse (min) minminatwork=minatwork mintimeoutmin=timeoutmin mintimeinmin=timeinmin (max) maxminatwork=minatwork maxtimeoutmin=timeoutmin maxtimeinmin=timeinmin (mean) timeinmin timeoutmin minatwork, by(unit BATCH year month day)
cou
save "linedaily_inandouttime", replace

use "linedaily_inandouttime", clear
merge m:1 unit using "monthandyearledfromlineleveldata"
keep if _merge==3


g monthy=year+(month/12)
destring yearled monthled, replace
g monthyled=yearled+(monthled/12)
g led=monthy>=monthyled
bys unit: egen unitled=mean(led)


merge m:1 unit using "tamu_allunitsmerge", gen(mm)
keep if mm==3
drop mm
merge m:1 longitude latitude year month day using "weather_2010to2014_allweeklags.dta", gen(mmm) keepusing(meant himeant wbgtmeant relativehumidity precipitation)
keep if mmm==3
drop mmm
save "inandouttime_sewing_merged_linelevel", replace

